* by Atsushi Yamagishi
* This data plots population aggregated by the distance from the epicenter
* We do not drop samples not used in the main part to keep consistecy with shisei-youran data
cap log close
clear all
set more off 


***
* Choose which version of 1945 population to use

* 1945 population data version. 1 = Building damage from 原爆戦災誌 2 = pre-post poulation ratio from 市政要覧. 3: mix of the two.
local subvar = 3

import dbase using "../../data/raw data/popemp_33_75_cleaned6.dbf"
global datadir "../../data/raw data"
global figuredir "../../output/figure"


*****************************
* Part 1: Using 1934-60 data
*****************************

* Renaming variables 
rename 旧町名 block
rename 全壊建 tdest_build 
rename 半壊建 halfdamage_build 
rename 小破建 smalldamage_build 
rename 無事建 nodamage_build 
rename 即死率 death_rate 
rename 負傷率 injury_rate
rename 無傷率 noinjury_rate 
rename 被爆数 total_hibaku 
rename 直爆数 total_chokubaku 
rename _45死数 total_death
drop lon lat 
* These coordiantes are calculated in ESPG4612. Centroids of the popemp_33_75_cleaned5.shp
rename xcoord lon
rename ycoord lat
rename area_3 size
* Convert the area size from m^2 to km^2
replace size = size/1000000


* Merge the distance from epicenter. Source = 原爆戦災誌　第一巻
merge 1:1 block using "$datadir/広島原爆デジタルアトラス/distance_epicenter_atlas.dta"

* If these rate are all zero, it means missing values for damages
gen missingdamage_human = (death_rate+injury_rate+noinjury_rate==0)
replace death_rate =. if missingdamage_human == 1
replace injury_rate =. if missingdamage_human == 1
replace noinjury_rate =. if missingdamage_human == 1
replace total_hibaku =. if missingdamage_human == 1
replace total_chokubaku =. if missingdamage_human == 1
replace total_death =. if missingdamage_human == 1

* No missing record for building damage.
gen missingdamage_build = (tdest_build + halfdamage_build + smalldamage_build + nodamage_build==0)
tab missingdamage_build

* Calculating population and household density for each year 
foreach t in 33 34 35 36 51 52 53 55 60{
  gen popdens`t' = pop`t'/size
  gen l_popdens`t' = ln(pop`t'/size) 
  gen hhdens`t' = hh`t'/size
  gen l_hhdens`t' = ln(hh`t'/size) 
}

* Calculate the implied population by bomb damage (i.e., estimated population distribution right after bombing.
* 1936 population * death_rate. Alternatively, can take into account that some injured people also died subsequently.
* To deal with 100% death rate, I assume the minimum 0.5% survival rate (might be absent at the day of bombing). See robustness

* Using the building damage adecuately captures that the center had nearly no population after bombing.
* Note there is no missing value for building damage.
* Note also that this measure does not take into account that survivers espcaped to suburban aras and they saw population increase.
* Maybe can validate the measure by using some partial info from 原爆戦災誌. sometimes local population some months afte bombing is recorded.
gen surviverate = 1/100 
replace surviverate = 1 - (tdest_build /100) if 100 - tdest_build > 0.5

gen surviverate2 = 1455/60894
replace surviverate2 = 5925 / 65578 if distance_epicenter == 3
replace surviverate2 = 11472 / 64954 if distance_epicenter == 4
replace surviverate2 = 31072 / 52150 if distance_epicenter == 5
replace surviverate2 = 27458 / 27168 if distance_epicenter == 6
replace surviverate2 = 59136 / 41523 if distance_epicenter >6

* Aggregate more than 3km, within 1km
gen distance_epicenter2 = distance_epicenter
replace distance_epicenter2 = 7 if distance_epicenter>6
replace distance_epicenter2=1 if distance_epicenter < 3

gen pop36_r = round(pop36, 1)

frame copy default new
frame new{

collapse (mean) surviverate2 tdest_build halfdamage_build [fw = pop36_r], by(distance_epicenter2) 

gen tdest_build2 = tdest_build^2

reg surviverate2 tdest_build tdest_build2
gen tdest_0 = _b[_cons]
gen tdest_1 = _b[tdest_build]
gen tdest_2 = _b[tdest_build2]
}

* _frval() returns values of variables stored in other frames. It returns var's ith observation (here, 1st)

gen tdest_0 = _frval(new, tdest_0, 1)
gen tdest_1 = _frval(new, tdest_1, 1)
gen tdest_2 = _frval(new, tdest_2, 1)


gen tdest_build2 = tdest_build^2


if `subvar' == 3{
gen surviverate3 = tdest_1*tdest_build + tdest_2*tdest_build2 + tdest_0
gen pop45bomb = pop36*surviverate3
gen popdens45bomb = pop45bomb / size
gen l_popdens45bomb = ln(popdens45bomb)
* Although this survive rate is based on population, use this for establishments too
*gen est45bomb = est38*surviverate3
*gen estdens45bomb = est45bomb / size
*gen l_estdens45bomb = ln(estdens45bomb)
* In this case, obs is not meaningful if building damage is missingdamage_build
drop if missingdamage_build==1
}
* To construct a placebo, get the population change rate from 1934--36 (pre-trend).
* Take the geometric averate to minimize idiosyncracy. pretrend is a 1-year growth rate.
gen pretrend1 = pop34/pop33
gen pretrend2 = pop35/pop34
gen pretrend3 = pop36/pop35
gen pretrend = (pretrend1 * pretrend2 * pretrend3)^(1/3)

* High density means lower growth rate, so even the fast-growing areas would stop growing in the end.
* But linear pre-trend assumption means that this is not taken into account.
reg pretrend l_popdens33

* How to deal with extreme growth at the block level?
* Censor if 4 times growth in 15 years. (1.1^15 >4)
* Same for low trend
replace pretrend = 1.1 if pretrend>1.1
replace pretrend = 0.9 if pretrend < 0.9

label var block "Name"
label var tdest_build "Rate of total destroy Building"
label var halfdamage_build "Rate of half damage buildings"
label var smalldamage_build "Rate of small damage buildings"
label var nodamage_build "Rate of no damage building"
label var death_rate "Rate of immediate death" 
label var injury_rate "Rate of injury" 


* CBD
gen temp_lat = lat if block == "八丁堀"
gen temp_lon = lon if block == "八丁堀"
replace temp_lat = lat if block == "紙屋町"
replace temp_lon = lon if block == "紙屋町"
replace temp_lat = 0 if temp_lat == . 
replace temp_lon = 0 if temp_lon ==. 

egen cbd_lat = sum(temp_lat)
replace cbd_lat = cbd_lat/2
egen cbd_lon = sum(temp_lon) 
replace cbd_lon = cbd_lon/2

drop temp_lat temp_lon

* Compute distance to CBD
geodist cbd_lat cbd_lon lat lon, generate(cbd_dist)

label var cbd_dist "Distance to CBD, km"

* Focus within 6km to eliminate outliers (like 似島）
drop if cbd_dist>6



* ***********************************************************************
* Comparison with pre-trend (placebo)
* Assume that linear population trend in 1934-36 continues since Dec 1936.
* ***********************************************************************
* Monthly-rate of pre-trend
gen pretrend_m = (pretrend)^(1/12)

* Add the survey month of population
rename pop33 pop3312
rename pop34 pop3412
rename pop35 pop3512
rename pop36 pop3612

rename pop51 pop5112
rename pop52 pop5212
rename pop53 pop5312
rename pop55 pop5501

rename pop60 pop6010


* Note that 1951 population is at the end of 1951, so almost 1952. 
gen cpop5112 = pop3612*(pretrend_m^180)

gen cpopdens5112 = cpop5112/size
gen l_cpopdens5112 = ln(cpopdens5112)

gen cpop5501 = pop3612*(pretrend_m^216)

gen cpopdens5501 = cpop5501/size
gen l_cpopdens5501 = ln(cpopdens5501)

gen cpop6010 = pop3612*(pretrend_m^285)

gen cpopdens6010 = cpop6010/size
gen l_cpopdens6010 = ln(cpopdens6010)


* Other years 
gen cpop4511 = pop3612*(pretrend_m^106)
gen cpop4604 = pop3612*(pretrend_m^111)
gen cpop4608 = pop3612*(pretrend_m^115)
gen cpop4612 = pop3612*(pretrend_m^119)
gen cpop4708 = pop3612*(pretrend_m^127)
gen cpop4808 = pop3612*(pretrend_m^139)

gen cpop5212 = pop3612*(pretrend_m^192)
gen cpop5312 = pop3612*(pretrend_m^204)


*** change the definition of dist. to match　市勢要覧要覧
gen d_epi = 1 
replace d_epi = 2 if distance_epicenter==3
replace d_epi = 3 if distance_epicenter==4
replace d_epi = 4 if distance_epicenter==5
replace d_epi = 5 if distance_epicenter==6
replace d_epi = 6 if distance_epicenter>=7



***
* Create data at the aggregate level (~km from the CBD).


collapse (sum) pop3312 pop3412 pop3512 pop3612 pop5112 pop5212 pop5312 pop5501 pop6010 cpop4511 cpop4604  cpop4608  cpop4612 cpop4708 cpop4808 cpop5112 cpop5212 cpop5312 cpop5501 cpop6010, by(d_epi) 

***
*Add population in 1945, 1946, 1947, 1948 from 広島市勢要覧 S23
***


* November 1945.
gen pop4511 = 0
replace pop4511 = 1455 if d_epi==1
replace pop4511 = 5925 if d_epi==2
replace pop4511 = 11472 if d_epi==3
replace pop4511 = 31072 if d_epi==4
replace pop4511 = 27458 if d_epi==5
replace pop4511 = 59136 if d_epi==6

gen tpop4511 = 136518

* April 1946.
gen pop4604 = 0
replace pop4604 = 3259 if d_epi==1
replace pop4604 = 9503 if d_epi==2
replace pop4604 = 16626 if d_epi==3
replace pop4604 = 41489 if d_epi==4
replace pop4604 = 29742 if d_epi==5
replace pop4604 = 70585 if d_epi==6

gen tpop4604 = 171204

* August 1946.
gen pop4608 = 0
replace pop4608 = 11198 if d_epi==1
replace pop4608 = 14327 if d_epi==2
replace pop4608 = 18752 if d_epi==3
replace pop4608 = 46928 if d_epi==4
replace pop4608 = 31347 if d_epi==5
replace pop4608 = 65567 if d_epi==6

gen tpop4608 = 188119

* Dec 1946.
gen pop4612 = 0
replace pop4612 = 13815 if d_epi==1
replace pop4612 = 16916 if d_epi==2
replace pop4612 = 20278 if d_epi==3
replace pop4612 = 48317 if d_epi==4
replace pop4612 = 32584 if d_epi==5
replace pop4612 = 68973 if d_epi==6

gen tpop4612 = 200883

* Aug 1947.
gen pop4708 = 0
replace pop4708 = 15907 if d_epi==1
replace pop4708 = 17603 if d_epi==2
replace pop4708 = 24346 if d_epi==3
replace pop4708 = 48911 if d_epi==4
replace pop4708 = 37068 if d_epi==5
replace pop4708 = 68310 if d_epi==6

gen tpop4708 = 212235

* Aug 1948.
gen pop4808 = 0
replace pop4808 = 25239 if d_epi==1
replace pop4808 = 28484 if d_epi==2
replace pop4808 = 30715 if d_epi==3
replace pop4808 = 54388 if d_epi==4
replace pop4808 = 37744 if d_epi==5
replace pop4808 = 69564 if d_epi==6

gen tpop4808 = 246134

* Total population in each year
foreach t in 3312 3412 3512 3612 5112 5212 5312 5501 6010{
egen tpop`t' = total(pop`t')
}
* Relative population share for each bin
foreach t in 3312 3412 3512 3612 4511 4604 4608 4612 4708 4808 5112 5212 5312 5501 6010{
gen rpop`t' = pop`t' / tpop`t'
}

* Total population in 1951 or 55 When taking into account the pretrend.
foreach t in 4511 4604 4608 4612 4708 4808 5112 5212 5312 5501 6010{
egen t_cpop`t' = total(cpop`t')
}
* Relative population share in the counterfactual
foreach t in 4511 4604 4608 4612 4708 4808 5112 5212 5312 5501 6010{
gen r_cpop`t' = cpop`t' / t_cpop`t'
}


reshape long pop cpop tpop rpop t_cpop r_cpop, i(d_epi) j(year_month)
tostring year_month, gen(year_month_s)
gen year = substr(year_month_s, 1, 2)
gen month = substr(year_month_s, 3, 2)
destring year month, replace
replace year = year+1900
replace r_cpop = rpop if year<1945

replace year = year + (month-1)/12

* Within 1km population share
keep if d_epi==1
#d;
twoway (line rpop year, lcolor(black)) (line r_cpop year, lcolor(black) lpattern(dash)),
    legend(label(1 Actual) label(2 Prediction))
	scheme(s1color) plotregion(lwidth(none) ilwidth(none))
	xlabel(1933.917 "Dec 1933" 1934.917 "Dec 1934" 1935.917 "Dec 1935" 1936.917 "Dec 1936" 1945.833 "Nov 1945" 1946.25 "Apr 1946" 1946.583 "Aug 1946" 1946.917 "Dec 1946" 1947.583 "Aug 1947" 1948.583 "Aug 1948" 1951.917 "Dec 1951" 1952.917 "Dec 1952" 1953.917 "Dec 1953" 1955 "Jan 1955" 1960.75 "Oct 1960")
	xtitle("Year")
	ytitle("Population share, within 1km from the epicenter") xlabel(, angle(vertical) labsize(vsmall)) ylabel(, angle(horizontal));
	graph export "$figuredir/3360_dwlike_epicenter.pdf", as(pdf)   replace ;
#d cr


cap log close 

